/*
 * Copyright 1999-2002 Carnegie Mellon University.  
 * Portions Copyright 2002 Sun Microsystems, Inc.  
 * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
 * All Rights Reserved.  Use is subject to license terms.
 * 
 * See the file "license.terms" for information on usage and
 * redistribution of this file, and for a DISCLAIMER OF ALL 
 * WARRANTIES.
 *
 */

package edu.cmu.sphinx.linguist.acoustic.tiedstate;

// Placeholder for a package import

import edu.cmu.sphinx.linguist.acoustic.*;
import edu.cmu.sphinx.util.props.*;

import java.io.IOException;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * Loads a tied-state acoustic model generated by the Sphinx-3 trainer.
 * <p>
 * It is not the goal of this documentation to provide an explanation about the concept of HMMs. The explanation below
 * is superficial, and provided only in a way that the files in the acoustic model package make sense.
 * <p>
 * An HMM models a process using a sequence of states. Associated with each state, there is a probability density
 * function. A popular choice for this function is a Gaussian mixture, that is, a summation of Gaussians. As you may
 * recall, a single Gaussian is defined by a mean and a variance, or, in the case of a multidimensional Gaussian, by a
 * mean vector and a covariance matrix, or, under some simplifying assumptions, a variance vector. The "means" and
 * "variances" files in the "continuous" directory contain exactly this: a table in which each line contains a mean
 * vector or a variance vector respectively. The dimension of these vectors is the same as the incoming data, the
 * encoded speech signal. The Gaussian mixture is a summation of Gaussians, with different weights for different
 * Gaussians. The "mixture_weights" file contains this: each line contains the weights for a combination of Gaussians.
 * <p>
 * The HMM is a model with a set of states. The transitions between states have an associated probability. These
 * probabilities make up the transition matrices stored in the "transition_matrices" file.
 * <p>
 * The files in the "continuous" directory are, therefore, tables, or pools, of means, variances, mixture weights, and
 * transition probabilities.
 * <p>
 * The dictionary is a file that maps words to their phonetic transcriptions, that is, it maps words to sequences of
 * phonemes.
 * <p>
 * The language model contains information about probabilities of words in a language. These probabilities could be for
 * individual words or for sequences of two or three words.
 * <p>
 * The model definition file in a way ties everything together. If the recognition system models phonemes, there is an
 * HMM for each phoneme. The model definition file has one line for each phoneme. The phoneme could be in a context
 * dependent or independent. Each line, therefore, identifies a unique HMM. This line has the phoneme identification,
 * the non-required left or right context, the index of a transition matrix, and, for each state, the index of a mean
 * vector, a variance vector, and a set of mixture weights.
 */
public class TiedStateAcousticModel implements AcousticModel {

    /** The property that defines the component used to load the acoustic model */
    @S4Component(type = Loader.class)
    public final static String PROP_LOADER = "loader";

    /** The property that defines the unit manager */
    @S4Component(type = UnitManager.class)
    public final static String PROP_UNIT_MANAGER = "unitManager";

    /** Controls whether we generate composites or CI units when no context is given during a lookup. */
    @S4Boolean(defaultValue = true)
    public final static String PROP_USE_COMPOSITES = "useComposites";


    // -----------------------------
    // Configured variables
    // -----------------------------
    protected String name;
    protected Logger logger;
    protected Loader loader;
    protected UnitManager unitManager;
    private boolean useComposites;
    private Properties properties;

    // ----------------------------
    // internal variables
    // -----------------------------
    final transient private Map<String, SenoneSequence> compositeSenoneSequenceCache = new HashMap<String, SenoneSequence>();
    private boolean allocated;

    public TiedStateAcousticModel( Loader loader, UnitManager unitManager, boolean useComposites) {
        this.loader = loader;
        this.unitManager = unitManager;
        this.useComposites = useComposites;
        this.logger = Logger.getLogger(getClass().getName());
    }

    public TiedStateAcousticModel() {

    }

    public void newProperties(PropertySheet ps) throws PropertyException {
        loader = (Loader) ps.getComponent(PROP_LOADER);
        unitManager = (UnitManager) ps.getComponent(PROP_UNIT_MANAGER);
        useComposites = ps.getBoolean(PROP_USE_COMPOSITES);
        logger = ps.getLogger();
    }

    /**
     * initialize this acoustic model with the given name and context.
     *
     * @throws IOException if the model could not be loaded
     */
    public void allocate() throws IOException {
        if (!allocated) {
            loader.load();
            logInfo();
            allocated = true;
        }
    }


    /* (non-Javadoc)
    * @see edu.cmu.sphinx.linguist.acoustic.AcousticModel#deallocate()
    */
    public void deallocate() {
    }


    /**
     * Returns the name of this AcousticModel, or null if it has no name.
     *
     * @return the name of this AcousticModel, or null if it has no name
     */
    public String getName() {
        return name;
    }


    /**
     * Gets a composite HMM for the given unit and context
     *
     * @param unit     the unit for the hmm
     * @param position the position of the unit within the word
     * @return a composite HMM
     */
    private HMM getCompositeHMM(Unit unit, HMMPosition position) {

        Unit ciUnit = unitManager.getUnit(unit.getName(), unit.isFiller(),
                Context.EMPTY_CONTEXT);

        SenoneSequence compositeSequence = getCompositeSenoneSequence(unit,
                position);

        SenoneHMM contextIndependentHMM = (SenoneHMM) lookupNearestHMM(ciUnit,
                HMMPosition.UNDEFINED, true);
        float[][] tmat = contextIndependentHMM.getTransitionMatrix();
        return new SenoneHMM(unit, compositeSequence, tmat, position);
    }


    /**
     * Given a unit, returns the HMM that best matches the given unit. If exactMatch is false and an exact match is not
     * found, then different word positions are used. If any of the contexts are non-silence filler units. a silence
     * filler unit is tried instead.
     *
     * @param unit       the unit of interest
     * @param position   the position of the unit of interest
     * @param exactMatch if true, only an exact match is acceptable.
     * @return the HMM that best matches, or null if no match could be found.
     */
    public HMM lookupNearestHMM(Unit unit, HMMPosition position,
                                boolean exactMatch) {

        if (exactMatch)
            return lookupHMM(unit, position);

        HMMManager mgr = loader.getHMMManager();
        HMM hmm = mgr.get(position, unit);

        if (hmm != null) {
            return hmm;
        }
        // no match, try a composite

        if (useComposites && hmm == null) {
            if (isComposite(unit)) {

                hmm = getCompositeHMM(unit, position);
                if (hmm != null) {
                    mgr.put(hmm);
                }
            }
        }
        // no match, try at other positions
        if (hmm == null) {
            hmm = getHMMAtAnyPosition(unit);
        }
        // still no match, try different filler
        if (hmm == null) {
            hmm = getHMMInSilenceContext(unit, position);
        }

        // still no match, backoff to base phone
        if (hmm == null) {
            Unit ciUnit = lookupUnit(unit.getName());

            assert unit.isContextDependent();
            if (ciUnit == null) {
                logger.severe("Can't find HMM for " + unit.getName());
            }
            assert ciUnit != null;
            assert !ciUnit.isContextDependent();

            hmm = mgr.get(HMMPosition.UNDEFINED, ciUnit);
        }

        assert hmm != null;

        // System.out.println("PROX match for "
        // 	+ unit + " at " + position + ":" + hmm);

        return hmm;
    }


    /**
     * Determines if a unit is a composite unit
     *
     * @param unit the unit to test
     * @return true if the unit is missing a right context
     */
    private boolean isComposite(Unit unit) {

        if (unit.isFiller()) {
            return false;
        }

        Context context = unit.getContext();
        if (context instanceof LeftRightContext) {
            LeftRightContext lrContext = (LeftRightContext) context;
            if (lrContext.getRightContext() == null) {
                return true;
            }
            if (lrContext.getLeftContext() == null) {
                return true;
            }
        }
        return false;
    }


    /**
     * Looks up the context independent unit given the name
     *
     * @param name the name of the unit
     * @return the unit or null if the unit was not found
     */
    private Unit lookupUnit(String name) {
        return loader.getContextIndependentUnits().get(name);
    }


    /**
     * Returns an iterator that can be used to iterate through all the HMMs of the acoustic model
     *
     * @return an iterator that can be used to iterate through all HMMs in the model. The iterator returns objects of
     *         type <code>HMM</code>.
     */
    public Iterator<HMM> getHMMIterator() {
        return loader.getHMMManager().iterator();
    }


    /**
     * Returns an iterator that can be used to iterate through all the CI units in the acoustic model
     *
     * @return an iterator that can be used to iterate through all CI units. The iterator returns objects of type
     *         <code>Unit</code>
     */
    public Iterator<Unit> getContextIndependentUnitIterator() {
        return loader.getContextIndependentUnits().values().iterator();
    }


    /**
     * Get a composite senone sequence given the unit.
     *
     * The unit should have a LeftRightContext, where one or two of 'left' or
     * 'right' may be null to indicate that the match should succeed on any
     * context.
     *
     * @param unit the unit
     * @param position position in HMM
     * @return senone sequence
     */
    public SenoneSequence getCompositeSenoneSequence(Unit unit,
                                                     HMMPosition position)
    {
        String unitStr = unit.toString();
        SenoneSequence compositeSenoneSequence;
        compositeSenoneSequence = compositeSenoneSequenceCache.get(unitStr);

        if (logger.isLoggable(Level.FINE))
            logger.fine("getCompositeSenoneSequence: "
                        + unit +
                        compositeSenoneSequence == null ? "" : "Cached");

        if (compositeSenoneSequence != null)
            return compositeSenoneSequence;

        // Iterate through all HMMs looking for
        // a) An hmm with a unit that has the proper base
        // b) matches the non-null context

        Context context = unit.getContext();
        List<SenoneSequence> senoneSequenceList;
        senoneSequenceList = new ArrayList<SenoneSequence>();

        // collect all senone sequences that match the pattern
        for (Iterator<HMM> i = getHMMIterator(); i.hasNext();) {
            SenoneHMM hmm = (SenoneHMM) i.next();
            if (hmm.getPosition() == position) {
                Unit hmmUnit = hmm.getUnit();
                if (hmmUnit.isPartialMatch(unit.getName(), context)) {
                    if (logger.isLoggable(Level.FINE)) {
                        logger.fine("collected: " + hmm.getUnit());
                    }
                    senoneSequenceList.add(hmm.getSenoneSequence());
                }
            }
        }

        // couldn't find any matches, so at least include the CI unit
        if (senoneSequenceList.isEmpty()) {
            Unit ciUnit = unitManager.getUnit(unit.getName(), unit.isFiller());
            SenoneHMM baseHMM = lookupHMM(ciUnit, HMMPosition.UNDEFINED);
            senoneSequenceList.add(baseHMM.getSenoneSequence());
        }

        // Add this point we have all of the senone sequences that
        // match the base/context pattern collected into the list.
        // Next we build a CompositeSenone consisting of all of the
        // senones in each position of the list.

        // First find the longest senone sequence

        int longestSequence = 0;
        for (SenoneSequence ss : senoneSequenceList) {
            if (ss.getSenones().length > longestSequence) {
                longestSequence = ss.getSenones().length;
            }
        }

        // now collect all of the senones at each position into
        // arrays so we can create CompositeSenones from them
        // QUESTION: is is possible to have different size senone
        // sequences. For now lets assume the worst case.

        List<CompositeSenone> compositeSenones = new ArrayList<CompositeSenone>();
        float logWeight = 0.0f;
        for (int i = 0; i < longestSequence; i++) {
            Set<Senone> compositeSenoneSet = new HashSet<Senone>();
            for (SenoneSequence senoneSequence : senoneSequenceList) {
                if (i < senoneSequence.getSenones().length) {
                    Senone senone = senoneSequence.getSenones()[i];
                    compositeSenoneSet.add(senone);
                }
            }
            compositeSenones.add(CompositeSenone.create(
                    compositeSenoneSet, logWeight));
        }

        compositeSenoneSequence = SenoneSequence.create(compositeSenones);
        compositeSenoneSequenceCache.put(unit.toString(),
                compositeSenoneSequence);

        if (logger.isLoggable(Level.FINE)) {
            logger.fine(unit + " consists of " + compositeSenones.size() + " composite senones");
            if (logger.isLoggable(Level.FINEST)) {
                compositeSenoneSequence.dump("am");
            }
        }
        return compositeSenoneSequence;
    }


    /**
     * Returns the size of the left context for context dependent units
     *
     * @return the left context size
     */
    public int getLeftContextSize() {
        return loader.getLeftContextSize();
    }


    /**
     * Returns the size of the right context for context dependent units
     *
     * @return the left context size
     */
    public int getRightContextSize() {
        return loader.getRightContextSize();
    }


    /**
     * Given a unit, returns the HMM that exactly matches the given unit.
     *
     * @param unit     the unit of interest
     * @param position the position of the unit of interest
     * @return the HMM that exactly matches, or null if no match could be found.
     */
    private SenoneHMM lookupHMM(Unit unit, HMMPosition position) {
        return (SenoneHMM) loader.getHMMManager().get(position, unit);
    }
    
    
    public Senone getSenone(long id) {
        return loader.getSenonePool().get((int)id);
    }

    /** Dumps information about this model to the logger */
    protected void logInfo() {
        if (loader != null) {
            loader.logInfo();
        }
        logger.info("CompositeSenoneSequences: " +
                compositeSenoneSequenceCache.size());
    }


    /**
     * Searches an hmm at any position
     *
     * @param unit the unit to search for
     * @return hmm the hmm or null if it was not found
     */
    private SenoneHMM getHMMAtAnyPosition(Unit unit) {
        HMMManager mgr = loader.getHMMManager();
        for (HMMPosition pos : HMMPosition.values()) {
            SenoneHMM hmm = (SenoneHMM)mgr.get(pos, unit);
            if (hmm != null)
                return hmm;
        }
        return null;
    }


    /**
     * Given a unit, search for the HMM associated with this unit by replacing all non-silence filler contexts with the
     * silence filler context
     *
     * @param unit the unit of interest
     * @return the associated hmm or null
     */
    private SenoneHMM getHMMInSilenceContext(Unit unit, HMMPosition position) {
        SenoneHMM hmm = null;
        HMMManager mgr = loader.getHMMManager();
        Context context = unit.getContext();

        if (context instanceof LeftRightContext) {
            LeftRightContext lrContext = (LeftRightContext) context;

            Unit[] lc = lrContext.getLeftContext();
            Unit[] rc = lrContext.getRightContext();

            Unit[] nlc;
            Unit[] nrc;

            if (hasNonSilenceFiller(lc)) {
                nlc = replaceNonSilenceFillerWithSilence(lc);
            } else {
                nlc = lc;
            }

            if (hasNonSilenceFiller(rc)) {
                nrc = replaceNonSilenceFillerWithSilence(rc);
            } else {
                nrc = rc;
            }

            if (nlc != lc || nrc != rc) {
                Context newContext = LeftRightContext.get(nlc, nrc);
                Unit newUnit = unitManager.getUnit(unit.getName(),
                        unit.isFiller(), newContext);
                hmm = (SenoneHMM) mgr.get(position, newUnit);
                if (hmm == null) {
                    hmm = getHMMAtAnyPosition(newUnit);
                }
            }
        }
        return hmm;
    }


    /**
     * Returns true if the array of units contains a non-silence filler
     *
     * @param units the units to check
     * @return true if the array contains a filler that is not the silence filler
     */
    private boolean hasNonSilenceFiller(Unit[] units) {
        if (units == null) {
            return false;
        }

        for (Unit unit : units) {
            if (unit.isFiller() &&
                !unit.equals(UnitManager.SILENCE)) {
                return true;
            }
        }
        return false;
    }


    /**
     * Returns a unit array with all non-silence filler units replaced with the silence filler a non-silence filler
     *
     * @param context the context to check
     * @return true if the array contains a filler that is not the silence filler
     */
    private Unit[] replaceNonSilenceFillerWithSilence(Unit[] context) {
        Unit[] replacementContext = new Unit[context.length];
        for (int i = 0; i < context.length; i++) {
            if (context[i].isFiller() &&
                    !context[i].equals(UnitManager.SILENCE)) {
                replacementContext[i] = UnitManager.SILENCE;
            } else {
                replacementContext[i] = context[i];
            }
        }
        return replacementContext;
    }


    /**
     * Returns the properties of this acoustic model.
     *
     * @return the properties of this acoustic model
     */
    public Properties getProperties() {
        if (properties == null) {
            properties = new Properties();
            try {
                properties.load
                        (TiedStateAcousticModel.class.getResource("model.props").openStream());
            } catch (IOException ioe) {
                ioe.printStackTrace();
            }
        }
        return properties;
    }
}
